
library(tidyverse)
library(rio)

# set replication folder as working directory
setwd("~replication")

# import data - now including completed and non-completed answers
load("data_genderedcost_wide.rdata")

# a wide data set must be turned into long -- respondent serial is ID


##### Make data tidy ######
# Choose all variables relating to conjoint experiment
dataTask <- df_wide %>%
  rename(id = Id) %>%
  # selecting only id and experimental variables - not outcome variables and the likes (they mess up the pivoting)
  select(id,starts_with("conjoint_")) %>%
  select(-c(conjoint_version, conjoint_design, conjoint_attr_order,
            # these three variables are used to make sure that formand is borgmester in CPH and Rådmand in magistrat municipalities
            conjoint_attr_1_lvl_1_Text, conjoint_attr_1_lvl_2_Text, conjoint_attr_1_lvl_3_Text,
            conjoint_1_choice, conjoint_2_choice, conjoint_3_choice, conjoint_4_choice,
            conjoint_5_choice, conjoint_6_choice, conjoint_7_choice))

# conjoint column names explanation:
# Conjoint = experiment number
# Concept = left or right
# Attribute = Attribute (1 = position, 2 = remuneration, 3 = work load, 4 = work environment)
# Conjoint choice

# 1st number in column name = experiment/task number
# 2nd number in column name = profile 1 or 2
# 3rd number in column name = attribute

### remove conjoint, Concept and attribute from column name, so the column names only consist of a sequence of numbers
# these numbers are used for pivoting, as they will be imputed as values in the new columns
names(dataTask) <- str_replace_all(names(dataTask),"conjoint_", "")
names(dataTask) <- str_replace_all(names(dataTask),"Concept_", "")
names(dataTask) <- str_replace_all(names(dataTask),"attribute_", "")


dataTask_long <- dataTask %>% 
  select(-c(ends_with("coice"),
            starts_with("attr_order"))) %>% 
  pivot_longer(cols = !id,
               names_to = c("task_number", "profile", "attribute"),
               names_sep = "_",
               values_to = "attribute_level")

### this data frame actually became too long - it contains a row for each attribute-level-choice
### A row should be on the profile-level, where each attribute is a column.

# widen data to profile-level rows
dataTask_tidy <- dataTask_long %>% 
  pivot_wider(names_from = "attribute",
              names_prefix = "attribute_",
              values_from = "attribute_level")

# for feature 1 - the position - names changed depending on the municipality the candidate ran in.


# change column names to attribute names
# add labels and turn them into factors
dataTask_tidy <- dataTask_tidy %>%
  rename(position = attribute_1,
         remuneration = attribute_2,
         workload = attribute_3,
         work_environment = attribute_4) %>% 
  mutate(position = case_when(position==1~"Common Member",
                              position==2~"Committee chair - small influence",
                              position==3~"Committee chair - large influence"),
         remuneration = case_when(remuneration==1~"10% lower remuneration",
                                  remuneration==2~"Normal remuneration",
                                  remuneration==3~"10% higher remuneration"),
         workload = case_when(workload==1~"10% lower workload",
                              workload==2~"Normal workload",
                              workload==3~"10% higher workload"),
         work_environment = case_when(work_environment==1~"Characterized by mutual respect", #"Characterized by equality",
                                      work_environment==2~"Several experienced harassment",
                                      work_environment==3~"Several experienced sexism and harassment")) %>%
  # turn into factors and make level order
  mutate(position = factor(position, levels = c("Common Member",
                                                "Committee chair - small influence",
                                                "Committee chair - large influence")),
         remuneration = factor(remuneration, levels = c("10% lower remuneration",
                                                        "Normal remuneration",
                                                        "10% higher remuneration")),
         workload = factor(workload, levels = c("10% lower workload",
                                                "Normal workload",
                                                "10% higher workload")),
         work_environment = factor(work_environment, levels = c("Characterized by mutual respect",
                                                                "Several experienced harassment",
                                                                "Several experienced sexism and harassment")))


####### ADD EXPERIMENT CHOICES TO LONG DF OF TASKS - I.E. ADDING OUTCOMES

# select only choice variables
dataChoice <- df_wide %>%
  rename(id = Id) %>%
  select(c(id,
           conjoint_1_choice, conjoint_2_choice, conjoint_3_choice, conjoint_4_choice,
           conjoint_5_choice, conjoint_6_choice, conjoint_7_choice))


# change column names for pivoting
names(dataChoice) <- str_replace_all(names(dataChoice),"conjoint_", "")
names(dataChoice) <- str_replace_all(names(dataChoice),"_choice", "")


# make choice df long
data_choice_tidy <- dataChoice %>% 
  pivot_longer(cols = !id,
               names_to = c("task_number"),
               #names_sep = "_",
               values_to = "profile_choice")

# merge long task and long choice df
# and create 0/1 outcome variable - based on string matches between attributed profile and profile choice.
df_long <- dataTask_tidy %>% 
  left_join(data_choice_tidy) %>% 
  mutate(choice = ifelse(profile_choice==profile,1,0))

#############################################################
##### CREATE COLUMNS FOR DOMINANT OPTIONS AND ATTENTION #####
#############################################################


# widen data to task-level rows
dataTask_dom <- dataTask_long %>% 
  pivot_wider(names_from = c("profile", "attribute"),
              names_prefix = "profile_attribute_",
              values_from = "attribute_level") %>%
  # first rename variables to keep track of tings
  rename(a_position = profile_attribute_1_1,
         b_position = profile_attribute_2_1,
         a_remuneration = profile_attribute_1_2,
         b_remuneration = profile_attribute_2_2,
         a_workload = profile_attribute_1_3,
         b_workload = profile_attribute_2_3,
         a_workingenvironment = profile_attribute_1_4,
         b_workingenvironment = profile_attribute_2_4)

## now each row is a task-level -- with one column for each the realized attributes on the right and left hand side of the conjoint

# for position and remuneration the higher value, the more optimal a choice -- while this is reverse for working hours and work environment
# make variable expressing whether all right or left hand side were the better option

df_dom <- dataTask_dom %>%
  # define variable for whether side A or side B has dominant levels
  mutate(a_dominant = case_when(a_position>=b_position & a_remuneration>=b_remuneration & a_workload <= b_workload &
                                  a_workingenvironment <= b_workingenvironment~1,
                                a_position==b_position & a_remuneration==b_remuneration & a_workload == b_workload &
                                  a_workingenvironment == b_workingenvironment~0, TRUE~0),
         b_dominant = case_when(b_position>=a_position & b_remuneration>=a_remuneration & b_workload <= a_workload &
                                  b_workingenvironment <= a_workingenvironment~1,
                                a_position==b_position & a_remuneration==b_remuneration & a_workload == b_workload &
                                  a_workingenvironment == b_workingenvironment~0, TRUE~0)) %>%
  # dummy for whether the candidate is exposed to a dominant option
  mutate(dominant_exposure = ifelse(a_dominant==1 | b_dominant==1,1,0))

mean(df_dom$dominant_exposure)  # 33.29 percent choices contain dominant options

## merge long df and dominant df
df_long <- df_long %>%
  left_join(df_dom) %>%
  # define dummy for whether a candidate is exposed to a dominant option and take it
  mutate(take_dominant = case_when(profile_choice==1 & a_dominant==1~1,
                                   profile_choice==2 & b_dominant==1~1,
                                   TRUE~0)) %>%
  # create dummy for inattention  -- infered from inability to select the dominant option
  # attention dummy defined as a choice with dominant option where the non-dominant choice was selected
  mutate(inattentive = ifelse(dominant_exposure==1 & take_dominant==0,1,0))

#######################################
##### CREATE BACKGROUND VARIABLES #####
#######################################

## add demographics and covariates to df:
df_background <- df_wide %>%
  rename(id = Id) %>% 
  rename(order = conjoint_attr_order_record) %>%
  dplyr::select(-c(starts_with("conjoint_")))

# make variables into factors and adjust in other ways:

df_background <- df_background %>% 
  mutate(woman = ifelse(c_Koen=="Kvinde", "Woman", "Man")) %>% 
  mutate(working_hours = as.numeric(arbejdstid)) %>% 
  mutate(sex = as.factor(woman),
         age = 2021-as.numeric(alder_o1),
         education = case_when(uddannelse==1~"Primary School",
                               uddannelse==2~"High School",
                               uddannelse==3~"Other", # specialarbejderudd.
                               uddannelse==4~"Vocational", #erhvervsudd.
                               uddannelse==5~"Short, higher education (>3 years)",
                               uddannelse==6~"Bachelor/professional degree",
                               uddannelse==7~"Long, higher",
                               uddannelse==8~"Other"),
         electoral_performance = case_when(kom_valg==1~"First-time elected",
                                           kom_valg==2~"Re-elected",
                                           kom_valg==3~"Not elected, but elected previously",
                                           kom_valg==4~"Not elected, not elected previously"),
         previously_elected = case_when(kom_valg==2~"Yes",
                                          kom_valg==3~"Yes",
                                        TRUE~"No"),
         marital_status = case_when(civilstatus==1~"Married",
                                    civilstatus==2~"In a relationship",
                                    civilstatus==3~"Single",
                                    civilstatus==4~"Do not want to provide"),
         harassment_risk = case_when(risiko==1~"Very low risk",
                                     risiko==2~"Low risk",
                                     risiko==3~"Neither or",
                                     risiko==4~"High risk",
                                     risiko==5~"Very high risk",
                                     risiko==6~"Do not know"),
         # take only first letter in ideology scale (to move "very leftwing/rightwing" after 0/10), and make it numeric
         ideology = as.numeric(substr(politik_skala,1,2))) %>% 
  # make factors and sort order:
  mutate(education = factor(education, levels = c("Primary School", "High School",
                                                  "Special worker education", "Vocational",
                                                  "Short, higher education (>3 years)",
                                                  "Bachelor/professional degree","Long, higher","Other")),
         harassment_risk = factor(harassment_risk, levels = c("Very low risk",
                                                              "Low risk",
                                                              "Neither or",
                                                              "High risk",
                                                              "Very high risk",
                                                              "Do not know")),
         previously_elected = factor(previously_elected),
         marital_status = factor(marital_status, levels = c("Married", "In a relationship", "Single", "Do not want to provide")))


## define victims of sexual harassment
df_background <- df_background %>%
  # first create variabel that sums the five statements about experiences (intensity of victimhood)
  mutate(kraenk_1_dummy = ifelse(kraenkelser_1_resp==1,1,0),
         kraenk_2_dummy = ifelse(kraenkelser_2_resp==1,1,0),
         kraenk_3_dummy = ifelse(kraenkelser_3_resp==1,1,0),
         kraenk_4_dummy = ifelse(kraenkelser_4_resp==1,1,0),
         kraenk_5_dummy = ifelse(kraenkelser_5_resp==1,1,0)) %>% 
  mutate(victimhood_intensity = kraenk_1_dummy+kraenk_2_dummy+kraenk_3_dummy+kraenk_4_dummy+kraenk_5_dummy) %>% 
  mutate(victim = ifelse(victimhood_intensity>0,"Victim","Non-victim")) %>%
  mutate(victim_2 = ifelse(victimhood_intensity>1,"Victim", "Non-victim")) %>% 
  mutate(victim_3 = ifelse(victimhood_intensity>2, "Victim", "Non-victim")) %>% 
  # turn into factors
  mutate(victim = factor(victim),
         victim_2 = factor(victim_2),
         victim_3 = factor(victim_3))

# merge background variables into long df
df_long <- df_long %>% 
  left_join(df_background)



# save data
save(df_long, file = "data_genderedcost_long.rdata")
save(df_background, file = "data_genderedcost_background.rdata")



